#!/usr/bin/python
import csv
import sys
import time
#print(type(sys.stdin))


def mystrip( item = 'mystrip' ):
    item = item.strip('\n')
    item = item.strip('\r')
    item = item.strip('\t')
    item = item.strip('"')
    item = item.strip("'")
    item = item.strip('.')
    item = item.strip('#')
    item = item.strip('$')
    item = item.strip(':')
    item = item.strip('-')
    return item

row_num = -1
year = []
item1 = 2008
while 1:
    year.append(str(item1)+'/')
    item1 += 1
    if(item1 == 2017):
        break


initial = []
for line in sys.stdin:
    initial.append(line)


initial_num = 0

splic = []
splicn = 0




while initial_num < len(initial):
    try:
        if initial[initial_num][0:5] != '2016/':
            initial_num+=1
            continue
        initial[initial_num]  = mystrip(initial[initial_num])
        stritem = initial[initial_num]
        initial_num += 1
#      print(1)
        #识别是不是同一日期的数据串行，将其归并到一起
        while initial_num < len(initial) and (initial[initial_num][0:5] in year) == 0:
            initial[initial_num] = mystrip(initial[initial_num])
            stritem = stritem + initial[initial_num]
            initial_num += 1
        notyear = 0
    
        if initial[initial_num][0:5] == '2017/':
            splic.append(stritem)
            break
        #将日期剔除
#        print(2)
        while stritem[notyear] != ',':
            notyear+=1
        stritem = stritem[notyear:]
        stritem = stritem.strip(',')
        stritem = mystrip(stritem)
#        print(3)
#        print(stritem)       
        if stritem[0] == 'b' and stritem [1] == "'":
            stritem = stritem.strip("b")
            stritem = stritem.strip("'")
        if stritem[0] == 'b' and stritem [1] == '"':
            stritem = stritem.strip("b")
            stritem = stritem.strip('"')
        
        i = 0
        while i <5:
            mystrip(stritem)
            i+=1
    
        
        splic.append(stritem)
    except:
        if initial_num == len(initial):
#           print("处理原始字符串成功")
            break
        print("处理原始字符串出现问题")
        sys.exit()

txt_tables = []
f = open("/home/allen/mypython/stop-word-list.txt","r",encoding = 'utf-8')
stopline = f.readline()

while stopline:
    stopline = stopline.strip('\n')
    txt_tables.append(stopline)
    stopline = f.readline()
print(type(txt_tables[0]))    


result = []
for line in splic:
    words = line.split()
    for word in words:
        word = word.lower()
        word = mystrip(word)
        word = mystrip(word)
        word = word.strip(',')
#       print('%s\t%d'%(word,1))
        temp = 1
        for txt in txt_tables:
            if word.lower() == txt.lower():
                print(word)
                print('stop_word')
                temp = 0
        if temp:
            result.append(word)
#print(result)

with open('/home/allen/mypython/middle.csv','w',newline = '')as csvfile:
    writer = csv.writer(csvfile)
    for row in result:
        writer.writerow([row])








